home *** CD-ROM | disk | FTP | other *** search
/ ftp.mactech.com 2010 / ftp.mactech.com.tar / ftp.mactech.com / machack / Hacks97 / NewsTicker.sit / NewsTicker / source code / Extractors / NewsComExtractor.cp < prev    next >
Text File  |  1997-06-26  |  4KB  |  167 lines

  1. /*------------------------------------------------------------------------------
  2. #
  3. #    NewsTicker, my Hack for 1997
  4. #
  5. #    NewsComExtractor.h    -    Derived from HTMLExtractor, we get passed the tokens
  6. #                            and try to recognize headlines out of it.  We parse
  7. #                            the page "www.news.com", the technical news page of
  8. #                            c|net.com.
  9. #
  10. ------------------------------------------------------------------------------*/
  11. #include <string.h>
  12.  
  13. #include "TickerGlobals.h"
  14. #include "NewsComExtractor.h"
  15. #include "HTMLExtractor.h"
  16.  
  17. // Refresh every 20 minutes
  18.  
  19. #define    kNewsPeriod    1200
  20. #define kNewsAddress "www.news.com"
  21.  
  22. // Globals for the News.Com extractor
  23.  
  24. unsigned long    gNewsNextTime = 0;
  25.  
  26. class NEWSExtractor: public HTMLExtractor
  27. {
  28.     protected:
  29.         enum    NewsParser    {    knpParsing,
  30.                                 //text headlines are <f><strong><a>headline
  31.                                 knpHasFont, knpHasStrong, knpHasLinkAndStrong,
  32.                                 knpHasAllForText, knpWaitingForParagraph,
  33.                                 //Graphic headlines are <a><img>
  34.                                 knpHasLink    };
  35.                                 
  36.         NewsParser    mfCurrentState;
  37.         Str255        mfTheURL;
  38.         Boolean        mfInTD;
  39.      
  40.     public:
  41.                         NEWSExtractor(sMyDataPtr theDataPtr);
  42.         virtual        ~NEWSExtractor        (void){ }
  43.         
  44.         virtual void    HandleToken(char* string, short numchars, Boolean isCommand);
  45. };
  46.  
  47. //
  48. // We just parse the entries to find the element
  49. //
  50. NEWSExtractor::NEWSExtractor(sMyDataPtr theDataPtr)
  51.         :HTMLExtractor(kNewsAddress, 1000, theDataPtr)
  52. {
  53.     unsigned long now;
  54.     
  55.     mfCurrentState = knpParsing;    //just waiting for our thing to come through
  56.     mfInTD = false;
  57.     
  58.     GetDateTime(&now);
  59.     gNewsNextTime = now + kNewsPeriod;    //refresh the news every 20 minutes
  60. }
  61.  
  62. void NEWSExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
  63. {
  64.     Str255    thestr;
  65.     
  66.     if (isCommand)
  67.     {
  68.         if (MyCompareStr(string, "<TD "))    //table delimiters mark the image links
  69.             mfInTD = true;
  70.         if (MyCompareStr(string, "</TD "))
  71.         {
  72.             mfInTD = false;
  73.             mfCurrentState = knpParsing;
  74.         }
  75.         
  76.         switch (mfCurrentState)
  77.         {
  78.             case knpParsing:                        //from nothing, we want font or A
  79.                 if (MyCompareStr(string, "<FONT "))
  80.                     mfCurrentState = knpHasFont;
  81.                 
  82.                 else if (MyCompareStr(string, "<A ")&&mfInTD)
  83.                 {
  84.                     if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
  85.                     {
  86.                         mfCurrentState = knpHasLink;
  87.                     }
  88.                     else mfCurrentState = knpParsing;
  89.                 }
  90.                 break;
  91.             case knpHasFont:                        //for this, we only want strong
  92.                 if (MyCompareStr(string, "<STRONG>"))
  93.                     mfCurrentState = knpHasLinkAndStrong;
  94.                 else mfCurrentState = knpParsing;
  95.                 break;
  96.             case knpHasLinkAndStrong:                //for this, we only want <a>
  97.                 if (MyCompareStr(string, "<A "))
  98.                 {
  99.                     if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
  100.                     {
  101.                         mfCurrentState = knpHasAllForText;
  102.                     }
  103.                     else mfCurrentState = knpParsing;
  104.                 }
  105.                 else mfCurrentState = knpParsing;
  106.                 break;
  107.             case knpHasAllForText:
  108.                 mfCurrentState = knpParsing;        //any tag from this position is a failure
  109.                 break;
  110.             case knpWaitingForParagraph:    //OK, from now on, we're only waiting for a <P>
  111.                 if (MyCompareStr(string, "<P>"))
  112.                     mfCurrentState = knpParsing;
  113.                 break;
  114.             case knpHasLink:        //for this, we only want an img, if there's an alt text
  115.                 if (MyCompareStr(string, "<IMG "))
  116.                 {
  117.                     FindATag(string+4, (char*)&thestr[1], "ALT");
  118.                     thestr[0] = strlen( (char*)&thestr[1] );
  119.                     if (thestr[0]>0)
  120.                         AddEntry(thestr, mfTheURL);
  121.                 }
  122.                 mfCurrentState = knpParsing;
  123.                 break;
  124.         }
  125.     }
  126.     else
  127.     {
  128.         if (mfCurrentState==knpHasAllForText)    //OK, get got a headline!
  129.         {
  130.             if (numchars>255)
  131.                 numchars = 255;
  132.             thestr[0] = numchars;
  133.             BlockMove(string, &thestr[1], numchars);
  134.             
  135.             //Add the entry
  136.             AddEntry(thestr, mfTheURL);
  137.             
  138.             mfCurrentState = knpWaitingForParagraph;
  139.         }
  140.         else if (mfCurrentState!=knpWaitingForParagraph)
  141.             mfCurrentState = knpParsing;    //and wait for tne next headline
  142.     }
  143. }
  144.  
  145.  
  146. void LoadNewsCom(sMyDataPtr gGlobalsPtr)
  147. {
  148.     NEWSExtractor* theparser = new NEWSExtractor(gGlobalsPtr);
  149.     
  150.     theparser->ReadEntries();
  151.     delete theparser;
  152.         
  153.     InitCursor();
  154. }
  155.  
  156. // This reloads us if necessary
  157. Boolean MustReloadNewsCom(sMyDataPtr    gGlobalsPtr)
  158. {
  159.     unsigned long now;
  160.     
  161.     GetDateTime(&now);
  162.     
  163.     if (now<gNewsNextTime)    //time to check yet?
  164.         return false;
  165.     
  166.     return true;        //always recheck on the time
  167. }